import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
plt.style.use('seaborn')
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
#read the data
std = pd.read_csv('time_std.csv')
e7 = pd.read_csv('time_e7.csv')
std = std[['date','cost_mean','cost_median','cost_sum','usage']]
e7 = e7[['date','cost_mean','cost_median','cost_sum','usage']]
# groupby 'date', and use the mean value to analysis
std_low = std[std['usage']=='low'][['date','cost_mean']].groupby('date').mean()
std_med = std[std['usage']=='med'][['date','cost_mean']].groupby('date').mean()
std_hig = std[std['usage']=='hig'][['date','cost_mean']].groupby('date').mean()
e7_low = e7[e7['usage']=='low'][['date','cost_mean']].groupby('date').mean()
e7_med = e7[e7['usage']=='med'][['date','cost_mean']].groupby('date').mean()
e7_hig = e7[e7['usage']=='hig'][['date','cost_mean']].groupby('date').mean()
std_low.index = pd.to_datetime(std_low.index)
std_med.index = pd.to_datetime(std_med.index)
std_hig.index = pd.to_datetime(std_hig.index)
e7_low.index=pd.to_datetime(e7_low.index)
e7_hig.index=pd.to_datetime(e7_hig.index)
e7_med.index=pd.to_datetime(e7_med.index)
std_low.reset_index(inplace=True)
std_med.reset_index(inplace=True)
std_hig.reset_index(inplace=True)
e7_low.reset_index(inplace=True)
e7_med.reset_index(inplace=True)
e7_hig.reset_index(inplace=True)
# Create confidence level
std_low['low_CI'] = std_low['cost_mean'] - np.std(std_low['cost_mean'])
std_low['upper_CI'] = std_low['cost_mean'] + np.std(std_low['cost_mean'])
std_med['low_CI'] = std_med['cost_mean'] - np.std(std_med['cost_mean'])
std_med['upper_CI'] = std_med['cost_mean'] + np.std(std_med['cost_mean'])
std_hig['low_CI'] = std_hig['cost_mean'] - np.std(std_hig['cost_mean'])
std_hig['upper_CI'] = std_hig['cost_mean'] + np.std(std_hig['cost_mean'])
e7_low['low_CI'] = e7_low['cost_mean'] - np.std(e7_low['cost_mean'])
e7_low['upper_CI'] = e7_low['cost_mean'] + np.std(e7_low['cost_mean'])
e7_med['low_CI'] = e7_med['cost_mean'] - np.std(e7_med['cost_mean'])
e7_med['upper_CI'] = e7_med['cost_mean'] + np.std(e7_med['cost_mean'])
e7_hig['low_CI'] = e7_hig['cost_mean'] - np.std(e7_hig['cost_mean'])
e7_hig['upper_CI'] = e7_hig['cost_mean'] + np.std(e7_hig['cost_mean'])
std_low = std_low[std_low['date']>='20120201']
std_med = std_med[std_med['date']>='20120201']
std_hig = std_hig[std_hig['date']>='20120201']
e7_low = e7_low[e7_low['date']>='20120201']
e7_med = e7_med[e7_med['date']>='20120201']
e7_hig = e7_hig[e7_hig['date']>='20120201']
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_low['date'],std_low['cost_mean'],lw=2, alpha = 1,label='std_low')
plt.fill_between(std_low['date'], std_low['low_CI'], std_low['upper_CI'], alpha = 0.3,label='std_low_CI')
plt.plot(e7_low['date'],e7_low['cost_mean'],lw=2, alpha = 1,label='e7_low')
plt.fill_between(e7_low['date'], e7_low['low_CI'], e7_low['upper_CI'], alpha = 0.3,label='e7_low_CI')
plt.title('STD vs E7, low')
plt.legend(loc='best',fontsize=10)
plt.savefig('low.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_med['date'],std_med['cost_mean'],lw=2, alpha = 1,label='std_med')
plt.fill_between(std_med['date'], std_med['low_CI'], std_med['upper_CI'], alpha = 0.3,label='std_med_CI')
plt.plot(e7_med['date'],e7_med['cost_mean'],lw=2, alpha = 1,label='e7_med')
plt.fill_between(e7_med['date'], e7_med['low_CI'], e7_med['upper_CI'], alpha = 0.3,label='e7_med_CI')
plt.title('STD vs E7, med')
plt.legend(loc='best',fontsize=10)
plt.savefig('med.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_hig['date'],std_hig['cost_mean'],lw=2, alpha = 1,label='std_hig')
plt.fill_between(std_hig['date'], std_hig['low_CI'], std_hig['upper_CI'], alpha = 0.3,label='std_hig_CI')
plt.plot(e7_hig['date'],e7_hig['cost_mean'],lw=2, alpha = 1,label='e7_hig')
plt.fill_between(e7_hig['date'], e7_hig['low_CI'], e7_hig['upper_CI'], alpha = 0.3,label='e7_hig_CI')
plt.title('STD vs E7, hig')
plt.legend(loc='best',fontsize=10)
plt.savefig('hig.jpg')
time_e7_72 = pd.read_csv('time_e7_72.csv')
e7_low_72 = time_e7_72[time_e7_72['usage']=='low'][['date','cost_mean']].groupby('date').mean()
e7_med_72 = time_e7_72[time_e7_72['usage']=='med'][['date','cost_mean']].groupby('date').mean()
e7_hig_72 = time_e7_72[time_e7_72['usage']=='hig'][['date','cost_mean']].groupby('date').mean()
e7_low_72.index=pd.to_datetime(e7_low_72.index)
e7_hig_72.index=pd.to_datetime(e7_hig_72.index)
e7_med_72.index=pd.to_datetime(e7_med_72.index)
e7_low_72.reset_index(inplace=True)
e7_med_72.reset_index(inplace=True)
e7_hig_72.reset_index(inplace=True)
#Create confidence interval
e7_low_72['low_CI'] = e7_low_72['cost_mean'] - np.std(e7_low_72['cost_mean'])
e7_low_72['upper_CI'] = e7_low_72['cost_mean'] + np.std(e7_low_72['cost_mean'])
e7_med_72['low_CI'] = e7_med_72['cost_mean'] - np.std(e7_med_72['cost_mean'])
e7_med_72['upper_CI'] = e7_med_72['cost_mean'] + np.std(e7_med_72['cost_mean'])
e7_hig_72['low_CI'] = e7_hig_72['cost_mean'] - np.std(e7_hig_72['cost_mean'])
e7_hig_72['upper_CI'] = e7_hig_72['cost_mean'] + np.std(e7_hig_72['cost_mean'])
e7_low_72 = e7_low_72[e7_low_72['date']>='20120201']
e7_med_72 = e7_med_72[e7_med_72['date']>='20120201']
e7_hig_72 = e7_hig_72[e7_hig_72['date']>='20120201']
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_low['date'],std_low['cost_mean'],lw=2, alpha = 1,label='std_low')
plt.fill_between(std_low['date'], std_low['low_CI'], std_low['upper_CI'], alpha = 0.3,label='std_low_CI')
plt.plot(e7_low_72['date'],e7_low_72['cost_mean'],lw=2, alpha = 1,label='e7_low')
plt.fill_between(e7_low_72['date'], e7_low_72['low_CI'], e7_low_72['upper_CI'], alpha = 0.3,label='e7_low_CI')
plt.title('STD vs E7, low72')
plt.legend(loc='best',fontsize=10)
plt.savefig('low_72.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_med['date'],std_med['cost_mean'],lw=2, alpha = 1,label='std_med')
plt.fill_between(std_med['date'], std_med['low_CI'], std_med['upper_CI'], alpha = 0.3,label='std_med_CI')
plt.plot(e7_med_72['date'],e7_med_72['cost_mean'],lw=2, alpha = 1,label='e7_med')
plt.fill_between(e7_med_72['date'], e7_med_72['low_CI'], e7_med_72['upper_CI'], alpha = 0.3,label='e7_med_CI')
plt.title('STD vs E7, med72')
plt.legend(loc='best',fontsize=10)
plt.savefig('med72.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_hig['date'],std_hig['cost_mean'],lw=2, alpha = 1,label='std_hig')
plt.fill_between(std_hig['date'], std_hig['low_CI'], std_hig['upper_CI'], alpha = 0.3,label='std_hig_CI')
plt.plot(e7_hig_72['date'],e7_hig_72['cost_mean'],lw=2, alpha = 1,label='e7_hig')
plt.fill_between(e7_hig_72['date'], e7_hig_72['low_CI'], e7_hig_72['upper_CI'], alpha = 0.3,label='e7_hig_CI')
plt.title('STD vs E7, hig72')
plt.legend(loc='best',fontsize=10)
plt.savefig('hig72.jpg')
0.75/0.25
time_e7_75 = pd.read_csv('time_e7_75.csv')
e7_low_75 = time_e7_75[time_e7_75['usage']=='low'][['date','cost_mean']].groupby('date').mean()
e7_med_75 = time_e7_75[time_e7_75['usage']=='med'][['date','cost_mean']].groupby('date').mean()
e7_hig_75 = time_e7_75[time_e7_75['usage']=='hig'][['date','cost_mean']].groupby('date').mean()
e7_low_75.index=pd.to_datetime(e7_low_75.index)
e7_hig_75.index=pd.to_datetime(e7_hig_75.index)
e7_med_75.index=pd.to_datetime(e7_med_75.index)
e7_low_75.reset_index(inplace=True)
e7_med_75.reset_index(inplace=True)
e7_hig_75.reset_index(inplace=True)
#Create confidence interval
e7_low_75['low_CI'] = e7_low_75['cost_mean'] - np.std(e7_low_75['cost_mean'])
e7_low_75['upper_CI'] = e7_low_75['cost_mean'] + np.std(e7_low_75['cost_mean'])
e7_med_75['low_CI'] = e7_med_75['cost_mean'] - np.std(e7_med_75['cost_mean'])
e7_med_75['upper_CI'] = e7_med_75['cost_mean'] + np.std(e7_med_75['cost_mean'])
e7_hig_75['low_CI'] = e7_hig_75['cost_mean'] - np.std(e7_hig_75['cost_mean'])
e7_hig_75['upper_CI'] = e7_hig_75['cost_mean'] + np.std(e7_hig_75['cost_mean'])
e7_low_75 = e7_low_75[e7_low_75['date']>='20120201']
e7_med_75 = e7_med_75[e7_med_75['date']>='20120201']
e7_hig_75 = e7_hig_75[e7_hig_75['date']>='20120201']
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_low['date'],std_low['cost_mean'],lw=2, alpha = 1,label='std_low')
plt.fill_between(std_low['date'], std_low['low_CI'], std_low['upper_CI'], alpha = 0.3,label='std_low_CI')
plt.plot(e7_low_75['date'],e7_low_75['cost_mean'],lw=2, alpha = 1,label='e7_low')
plt.fill_between(e7_low_75['date'], e7_low_75['low_CI'], e7_low_75['upper_CI'], alpha = 0.3,label='e7_low_CI')
plt.title('STD vs E7, low75')
plt.legend(loc='best',fontsize=10)
plt.savefig('low_75.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_med['date'],std_med['cost_mean'],lw=2, alpha = 1,label='std_med')
plt.fill_between(std_med['date'], std_med['low_CI'], std_med['upper_CI'], alpha = 0.3,label='std_med_CI')
plt.plot(e7_med_75['date'],e7_med_75['cost_mean'],lw=2, alpha = 1,label='e7_med')
plt.fill_between(e7_med_75['date'], e7_med_75['low_CI'], e7_med_75['upper_CI'], alpha = 0.3,label='e7_med_CI')
plt.title('STD vs E7, med75')
plt.legend(loc='best',fontsize=10)
plt.savefig('med75.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_hig['date'],std_hig['cost_mean'],lw=2, alpha = 1,label='std_hig')
plt.fill_between(std_hig['date'], std_hig['low_CI'], std_hig['upper_CI'], alpha = 0.3,label='std_hig_CI')
plt.plot(e7_hig_75['date'],e7_hig_75['cost_mean'],lw=2, alpha = 1,label='e7_hig')
plt.fill_between(e7_hig_75['date'], e7_hig_75['low_CI'], e7_hig_75['upper_CI'], alpha = 0.3,label='e7_hig_CI')
plt.title('STD vs E7, hig75')
plt.legend(loc='best',fontsize=10)
plt.savefig('hig75.jpg')
0.8/0.2
time_e7_80 = pd.read_csv('time_e7_80.csv')
e7_low_80 = time_e7_80[time_e7_80['usage']=='low'][['date','cost_mean']].groupby('date').mean()
e7_med_80 = time_e7_80[time_e7_80['usage']=='med'][['date','cost_mean']].groupby('date').mean()
e7_hig_80 = time_e7_80[time_e7_80['usage']=='hig'][['date','cost_mean']].groupby('date').mean()
e7_low_80.index=pd.to_datetime(e7_low_80.index)
e7_hig_80.index=pd.to_datetime(e7_hig_80.index)
e7_med_80.index=pd.to_datetime(e7_med_80.index)
e7_low_80.reset_index(inplace=True)
e7_med_80.reset_index(inplace=True)
e7_hig_80.reset_index(inplace=True)
#Create confidence interval
e7_low_80['low_CI'] = e7_low_80['cost_mean'] - np.std(e7_low_80['cost_mean'])
e7_low_80['upper_CI'] = e7_low_80['cost_mean'] + np.std(e7_low_80['cost_mean'])
e7_med_80['low_CI'] = e7_med_80['cost_mean'] - np.std(e7_med_80['cost_mean'])
e7_med_80['upper_CI'] = e7_med_80['cost_mean'] + np.std(e7_med_80['cost_mean'])
e7_hig_80['low_CI'] = e7_hig_80['cost_mean'] - np.std(e7_hig_80['cost_mean'])
e7_hig_80['upper_CI'] = e7_hig_80['cost_mean'] + np.std(e7_hig_80['cost_mean'])
e7_low_80 = e7_low_80[e7_low_80['date']>='20120201']
e7_med_80 = e7_med_80[e7_med_80['date']>='20120201']
e7_hig_80 = e7_hig_80[e7_hig_80['date']>='20120201']
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_low['date'],std_low['cost_mean'],lw=2, alpha = 1,label='std_low')
plt.fill_between(std_low['date'], std_low['low_CI'], std_low['upper_CI'], alpha = 0.3,label='std_low_CI')
plt.plot(e7_low_80['date'],e7_low_80['cost_mean'],lw=2, alpha = 1,label='e7_low')
plt.fill_between(e7_low_80['date'], e7_low_80['low_CI'], e7_low_80['upper_CI'], alpha = 0.3,label='e7_low_CI')
plt.title('STD vs E7, low80')
plt.legend(loc='best',fontsize=10)
plt.savefig('low_80.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_med['date'],std_med['cost_mean'],lw=2, alpha = 1,label='std_med')
plt.fill_between(std_med['date'], std_med['low_CI'], std_med['upper_CI'], alpha = 0.3,label='std_med_CI')
plt.plot(e7_med_80['date'],e7_med_80['cost_mean'],lw=2, alpha = 1,label='e7_med')
plt.fill_between(e7_med_80['date'], e7_med_80['low_CI'], e7_med_80['upper_CI'], alpha = 0.3,label='e7_med_CI')
plt.title('STD vs E7, med80')
plt.legend(loc='best',fontsize=10)
plt.savefig('med80.jpg')
plt.figure(figsize=(15,5),dpi=300)
plt.plot(std_hig['date'],std_hig['cost_mean'],lw=2, alpha = 1,label='std_hig')
plt.fill_between(std_hig['date'], std_hig['low_CI'], std_hig['upper_CI'], alpha = 0.3,label='std_hig_CI')
plt.plot(e7_hig_80['date'],e7_hig_80['cost_mean'],lw=2, alpha = 1,label='e7_hig')
plt.fill_between(e7_hig_80['date'], e7_hig_80['low_CI'], e7_hig_80['upper_CI'], alpha = 0.3,label='e7_hig_CI')
plt.title('STD vs E7, hig80')
plt.legend(loc='best',fontsize=10)
plt.savefig('hig80.jpg')
training set and testing set to test which model is better
choose the arima to make forecast
import pandas as pd
import numpy as np
import datetime as dt
from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib.pylab import style
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import style
plt.rcParams['savefig.dpi'] = 500
plt.rcParams['figure.dpi'] = 500
style.use('seaborn')
import warnings; warnings.simplefilter('ignore')
%matplotlib inline
std_train = std_med.copy()
std_train = std_train[['date','cost_mean']]
std_train.index = std_train.date
del std_train['date']
# choose the data from '20180101' to make predictions
std_train = std_train[std_train.index>'20180101']
std_train.plot(figsize=(15,5))
# decomposition
decomposition =seasonal_decompose(std_train,model='mul')
std_train.trend = decomposition.trend
std_train.seasonal = decomposition.seasonal
std_train.residual = decomposition.resid
plt.figure(figsize=(15,9),dpi=200)
plt.subplot(311)
plt.plot(std_train.trend)
plt.title('trend')
plt.subplot(312)
plt.plot(std_train.seasonal)
plt.title('seasonal')
plt.subplot(313)
plt.plot(std_train.residual)
plt.title('residual')
#first order difference
train_diff = std_train.diff()
train_diff = train_diff.dropna()
plt.figure(dpi=200,figsize=(15,5))
plt.plot(train_diff)
plt.title('first order diff')
plot_acf(train_diff,lags=20)
plot_pacf(train_diff,lags=20)
# search for Optimal parameters p,d,q
import numpy as np
import itertools
p_min = 2
d_min = 1
q_min = 2
p_max = 8
d_max = 1
q_max = 8
# Initialize a DataFrame to store the results
results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min,p_max+1)],
columns=['MA{}'.format(i) for i in range(q_min,q_max+1)])
for p,d,q in itertools.product(range(p_min,p_max+1),
range(d_min,d_max+1),
range(q_min,q_max+1)):
if p==0 and d==0 and q==0:
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = np.nan
continue
try:
model = ARIMA(std_train, order=(p, d, q),
#enforce_stationarity=False,
#enforce_invertibility=False,
)
results = model.fit()
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = results.bic
except:
continue
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8),dpi=200)
ax = sns.heatmap(results_bic,
mask=results_bic.isnull(),
ax=ax,
annot=True,
fmt='.2f',
);
ax.set_title('BIC');
plt.savefig('ARIMA_std.png')
model_std = ARIMA(std_train, order=(2, 1, 2))
result_std = model_std.fit()
print(result_std.summary())
e772_train = e7_med_72[['date','cost_mean']]
e772_train.index = e772_train.date
e772_train = e7_med_72[['date','cost_mean']]
e772_train.index = e772_train.date
del e772_train['date']
del e772_train['date']
e772_train = e772_train[e772_train.index>'20180101']
e772_train.plot(figsize=(15,5))
# decomposition
decomposition =seasonal_decompose(e772_train,model='mul')
e772_train.trend = decomposition.trend
e772_train.seasonal = decomposition.seasonal
e772_train.residual = decomposition.resid
plt.figure(figsize=(15,9),dpi=200)
plt.subplot(311)
plt.plot(e772_train.trend)
plt.title('trend')
plt.subplot(312)
plt.plot(e772_train.seasonal)
plt.title('seasonal')
plt.subplot(313)
plt.plot(e772_train.residual)
plt.title('residual')
# search for Optimal parameters p,d,q
import numpy as np
import itertools
p_min = 2
d_min = 1
q_min = 2
p_max = 8
d_max = 1
q_max = 8
# Initialize a DataFrame to store the results
results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min,p_max+1)],
columns=['MA{}'.format(i) for i in range(q_min,q_max+1)])
for p,d,q in itertools.product(range(p_min,p_max+1),
range(d_min,d_max+1),
range(q_min,q_max+1)):
if p==0 and d==0 and q==0:
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = np.nan
continue
try:
model = ARIMA(e772_train, order=(p, d, q),
#enforce_stationarity=False,
#enforce_invertibility=False,
)
results = model.fit()
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = results.bic
except:
continue
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8),dpi=200)
ax = sns.heatmap(results_bic,
mask=results_bic.isnull(),
ax=ax,
annot=True,
fmt='.2f',
);
ax.set_title('BIC');
plt.savefig('ARIMA_e772.png')
model_e772 = ARIMA(e772_train, order=(2, 1, 2))
result_e772 = model_e772.fit()
print(result_std.summary())
e775_train = e7_med_75[['date','cost_mean']]
e775_train.index = e775_train.date
e775_train = e7_med_75[['date','cost_mean']]
e775_train.index = e775_train.date
del e775_train['date']
e775_train = e775_train[e775_train.index>'20180101']
e775_train.plot(figsize=(15,5))
# decomposition
decomposition =seasonal_decompose(e775_train,model='mul')
e775_train.trend = decomposition.trend
e775_train.seasonal = decomposition.seasonal
e775_train.residual = decomposition.resid
plt.figure(figsize=(15,9),dpi=200)
plt.subplot(311)
plt.plot(e775_train.trend)
plt.title('trend')
plt.subplot(312)
plt.plot(e775_train.seasonal)
plt.title('seasonal')
plt.subplot(313)
plt.plot(e775_train.residual)
plt.title('residual')
# search for Optimal parameters p,d,q
import numpy as np
import itertools
p_min = 2
d_min = 1
q_min = 2
p_max = 8
d_max = 1
q_max = 8
# Initialize a DataFrame to store the results
results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min,p_max+1)],
columns=['MA{}'.format(i) for i in range(q_min,q_max+1)])
for p,d,q in itertools.product(range(p_min,p_max+1),
range(d_min,d_max+1),
range(q_min,q_max+1)):
if p==0 and d==0 and q==0:
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = np.nan
continue
try:
model = ARIMA(e775_train, order=(p, d, q),
#enforce_stationarity=False,
#enforce_invertibility=False,
)
results = model.fit()
results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = results.bic
except:
continue
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8),dpi=200)
ax = sns.heatmap(results_bic,
mask=results_bic.isnull(),
ax=ax,
annot=True,
fmt='.2f',
);
ax.set_title('BIC');
plt.savefig('ARIMA_e775.png')
model_e775 = ARIMA(e775_train, order=(2, 1, 2))
result_e775 = model_e775.fit()
print(result_std.summary())
pred_index = pd.date_range(start='2019-07-18', periods=60)
# Build Model
fitted_std = model_std.fit(disp=-1)
fitted_e775 = model_e775.fit(disp=-1)
# Forecast
fc_std, se, conf = fitted_std.forecast(60, alpha=0.05) # 95% conf
fc_e775, se_e775, conf_e775 = fitted_e775.forecast(60, alpha=0.05) # 95% conf
# Plot
plt.figure(figsize=(15,5), dpi=200)
plt.plot(std_train, label='training_std')
fc_series_std = pd.Series(fc_std, index=pred_index)
plt.plot(e775_train, label='training_e775')
fc_series_e775 = pd.Series(fc_e775, index=pred_index)
lower_series_std = pd.Series(conf[:, 0], index=pred_index)
upper_series_std = pd.Series(conf[:, 1], index=pred_index)
lower_series_e775 = pd.Series(conf_e775[:, 0], index=pred_index)
upper_series_e775 = pd.Series(conf_e775[:, 1], index=pred_index)
plt.plot(fc_series_std, label='forecast_std')
plt.fill_between(lower_series_std.index, lower_series_std, upper_series_std,
color='r', alpha=.15)
plt.plot(fc_series_e775, label='forecast_e775')
plt.fill_between(lower_series_e775.index, lower_series_e775, upper_series_e775,
color='b', alpha=.15)
plt.title('Forecast vs Actuals, 75%')
plt.legend(loc='upper left', fontsize=8)
# Build Model
fitted_std = model_std.fit(disp=-1)
fitted_e772 = model_e772.fit(disp=-1)
# Forecast
fc_std, se, conf = fitted_std.forecast(60, alpha=0.05) # 95% conf
fc_e772, se_e772, conf_e772 = fitted_e772.forecast(60, alpha=0.05) # 95% conf
# Plot
plt.figure(figsize=(15,5), dpi=100)
plt.plot(std_train, label='training_std')
fc_series_std = pd.Series(fc_std, index=pred_index)
plt.plot(e772_train, label='training_e772')
fc_series_e772 = pd.Series(fc_e772, index=pred_index)
lower_series_std = pd.Series(conf[:, 0], index=pred_index)
upper_series_std = pd.Series(conf[:, 1], index=pred_index)
lower_series_e772 = pd.Series(conf_e772[:, 0], index=pred_index)
upper_series_e772 = pd.Series(conf_e772[:, 1], index=pred_index)
plt.plot(fc_series_std, label='forecast_std')
plt.fill_between(lower_series_std.index, lower_series_std, upper_series_std,
color='r', alpha=.15)
plt.plot(fc_series_e772, label='forecast_e772')
plt.fill_between(lower_series_e772.index, lower_series_e772, upper_series_e772,
color='b', alpha=.15)
plt.title('Forecast vs Actuals, 72%')
plt.legend(loc='upper left', fontsize=8)